# load packages
library(tidyverse)
library(quanteda)
library(quanteda.textstats)
library(LSX)
library(rio)
library(peacesciencer)
library(countrycode)
library(sjPlot)
library(sjlabelled)
library(interplot)
library(flextable)
library(hrbrthemes)
library(cowplot)
library(patchwork)
library(corrplot)

# import the human rights texts
reports <- readRDS("reports.RDS") |> filter(year > 1976 & year < 2011)

# number of reports (Figure 1)
reports |> ggplot() +
  geom_bar(aes(x = year, fill = organization)) +
  labs(x = "Year", y = "Number of Reports", fill = "Organization") +
  scale_fill_brewer(palette = "Set1") +
  theme_bw() +
  theme(legend.position = "bottom")

# build a training corpus
set.seed(100)
reports_sample <- reports[sample(nrow(reports), 2500), ]
corp <- corpus(reports_sample, text_field = "text")

# tokenization
toks <- corp |> corpus_reshape("sentences") |>
  tokens(remove_punct = TRUE) |>
  tokens_remove(stopwords("en"), padding = TRUE)

# dfm
dfm <- toks |> dfm() |>
  dfm_select("^\\p{L}+$", valuetype = "regex", min_nchar = 2) |>
  dfm_trim(min_termfreq = 5)

# create women's rights context terms
women <- char_context(toks, "women", p = 0.05)

# set the seed words
seeds <- c(rep(1, 7), rep(-1, 7))
names(seeds) <- c("poor", "problem*", "violat*", "discrimination", "bad", "serious", "suffer*", "good", "nice", "excellent", "correct", "improv*", "promot*", "support*")

# frequency of seed words (Table B1)
dfm_seeds <- dfm_keep(dfm, names(seeds))
freq_seeds <- textstat_frequency(dfm_seeds)
freq_seeds

# fit the LSS model
lss <- textmodel_lss(dfm, seeds = seeds, terms = women)

# visualize model (Figure 2)
textplot_terms(lss, highlighted =
                 c("educate", "fear", "unequal", "discrimination", "economic",
                   "marriage", "employment", "serious", "problems",
                   "domestic", "enhance", "violence", "acute", "prevent"))

# dfm of all reports
dfm_doc <- corpus(reports, text_field = "text") |>
  tokens(remove_punct = TRUE) |>
  tokens_remove(stopwords("en"), padding = TRUE) |>
  dfm()

# predict shaming scores
pred <- as.data.frame(predict(lss, se_fit = TRUE, newdata = dfm_doc))
pred$year <- docvars(dfm_doc, "year")
pred$COWcode <- docvars(dfm_doc, "country_cow_code")
pred$organization <- docvars(dfm_doc, "organization")

# temporal changes (Figure C1)
pred |> ggplot() +
  geom_point(aes(year, fit, color = organization), alpha = 0.2) +
  geom_smooth(aes(year, fit, color = organization)) +
  ylim(-3, 3) +
  labs(x = "Year", y = "Women's Rights Shaming Score", color = "Organization") +
  scale_color_brewer(palette = "Set1") +
  theme_bw() +
  theme(legend.position = "bottom")

# construct the women's rights shaming score
shaming <- pred |> group_by(year, COWcode) |>
  summarise(shaming = sum(fit)) |>
  ungroup()

# import the Murdie and Peksen (2015) data
mp2015 <- import("Murdie_Peksen_2015_clean.dta") 

# correlation with Murdie and Peksen (2015)
mp2015 <- left_join(mp2015, shaming, by = c("year", "ccode" = "COWcode"))
mp2015b <- mp2015 |> dplyr::select(year, ccode, whrnoposmgoldtotal, wrightscombined, wecon, wopol, wosoc, shaming) |> filter(year > 1976)
cor(mp2015b$whrnoposmgoldtotal, mp2015b$shaming, use = "complete.obs")

# distribution of Murdie and Peksen's measure and our measure (Figure D1)
fig_murdie <- mp2015b |> ggplot() +
  geom_histogram(aes(whrnoposmgoldtotal)) +
  labs(x = "WRO Shaming", y = "Count", subtitle = "Murdie and Peksen's (2025) measure") +
  theme_bw()
fig_wrs <- mp2015b |> ggplot() +
  geom_histogram(aes(shaming)) +
  labs(x = "Women's Rights Shaming Score", y = "Count", subtitle = "Our measure") +
  theme_bw()
fig_murdie + fig_wrs

# import the CIRI Human Rights Data
ciri <- read_csv("CIRI Data 1981_2011 2014.04.14.csv")

# correlation analysis (Table D1)
ciri <- left_join(ciri, shaming, by = c("YEAR" = "year", "COW" = "COWcode"))
ciri2 <- ciri |> dplyr::select(WECON, WOPOL, WOSOC, shaming) |> na.omit() |>
  filter(WECON %in% 0:3 & WOPOL %in% 0:3 & WOSOC %in% 0:3)
corrplot.mixed(cor(ciri2), order = 'AOE', number.cex = 1.5, tl.cex = 1.5, cl.cex = 1.25)

# Within-country variance of different measures (Table D2)
ciri3 <- ciri |> dplyr::select(CTRY, WECON, WOPOL, WOSOC, shaming) |> na.omit() |>
  filter(WECON %in% 0:3 & WOPOL %in% 0:3 & WOSOC %in% 0:3)

group_variance <- tapply(ciri3$WECON, ciri3$CTRY, var)
mean(group_variance, na.rm = TRUE)

group_variance <- tapply(ciri3$WOPOL, ciri3$CTRY, var)
mean(group_variance, na.rm = TRUE)

group_variance <- tapply(ciri3$WOSOC, ciri3$CTRY, var)
mean(group_variance, na.rm = TRUE)

group_variance <- tapply(ciri3$shaming, ciri3$CTRY, var)
mean(group_variance, na.rm = TRUE)

# import the vdem data
vdem <- readRDS("V-Dem-CY-Full+Others-v13.rds") |>
  dplyr::select(country_name, COWcode, year, v2x_gender) |>
  filter(year > 1976) |> na.omit()

# import the world bank data
wb_data <- read_csv("wb_data.csv")

# The Geddes Wright and Frantz Autocratic Regimes dataset
gwf <- read_csv("gwf2.csv")

# create the civil war data
civwar <- create_stateyears(system = "gw") %>%
  add_ucdp_acd(type = "intrastate") %>%
  add_ccode_to_gw() %>%
  mutate(civwar = ucdpongoing) %>%
  dplyr::select(ccode, year, civwar) %>% distinct()

# create the international war data
intwar <- create_stateyears(system = "gw") %>%
  add_ucdp_acd(type = "interstate") %>%
  add_ccode_to_gw() %>%
  mutate(intwar = ucdpongoing) %>%
  dplyr::select(ccode, year, intwar) %>% distinct()

# import country code
countries <- codelist_panel %>% dplyr::select(year, cown, iso2c)

# construct tidy data
data <- tibble(
  COWcode = rep(unique(reports$country_cow_code), each = length(1977:2011)),
  year = rep(1977:2011, length(unique(reports$country_cow_code)))
)

data <- data |>
  left_join(shaming, by = c("COWcode", "year")) |>
  left_join(vdem, by = c("COWcode", "year")) |>
  left_join(countries, by = c("year", "COWcode" = "cown")) |>
  left_join(wb_data, by = c("year", "iso2c"), na_matches = "never") |>
  left_join(civwar, by = c("COWcode" = "ccode", "year")) |>
  left_join(intwar, by = c("COWcode" = "ccode", "year")) |>
  left_join(gwf, by = c("year", "COWcode" = "country_cow_code")) |>
  mutate(shaming = ifelse(is.na(shaming), 0, shaming))

# generate leading dependent variables and logged variables
data <- data |> group_by(country_name) |> mutate(
  v2x_gender_lead1 = dplyr::lead(v2x_gender),
  v2x_gender_lead2 = dplyr::lead(v2x_gender, n = 2),
  v2x_gender_lead3 = dplyr::lead(v2x_gender, n = 3),
  ln_pop = log(pop),
  ln_gdppc = log(gdppc),
  oda_gdp = oda / (gdppc * pop)
) |> ungroup()

# Summary Statistics (Table E1)
summary2 <- function(x) {
  N <- map_int(x, function(x) sum(!is.na(x)))
  Mean <- map_dbl(x, mean, na.rm = TRUE)
  S.D. <- map_dbl(x, sd, na.rm = TRUE)
  Min. <- map_dbl(x, min, na.rm = TRUE)
  Max. <- map_dbl(x, max, na.rm = TRUE)
  result <- tibble(Variable = colnames(x), N, Mean = Mean, S.D. = S.D., Min. = Min., Max. = Max.)
  result
}

sum_stats <- data %>%
  dplyr::select(v2x_gender, shaming, autocracy, ln_pop, ln_gdppc, gdp_growth, civwar, intwar) %>%
  summary2() %>% flextable() %>%
  colformat_double(j = 3:6, digits = 2)

sum_stats

# label the variables
data <- data %>%
  var_labels(ln_pop = "log Population",
             ln_gdppc = "log GDP Per Capita",
             gdp_growth = "GDP Growth",
             civwar = "Civil War",
             intwar = "International War")

# main analysis (Table 2)
model1 <- lm(v2x_gender_lead1 ~ shaming * autocracy + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data)

data_sub <- data |> filter(autocracy == 1)

model2 <- lm(v2x_gender_lead1 ~ shaming * gwf_party + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)

model3 <- lm(v2x_gender_lead1 ~ shaming * gwf_military + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)

model4 <- lm(v2x_gender_lead1 ~ shaming * gwf_monarchy + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)

model5 <- lm(v2x_gender_lead1 ~ shaming * gwf_personal + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)

tab_model(model1, model2, model3, model4, model5, collapse.se = TRUE, show.ci = FALSE, p.style = "stars", transform = NULL, digits = 5)

# marginal effects (Figure 3)
me2 <- interplot(model2, var1 = "shaming", var2 = "gwf_party") +
  labs(x = "(a)", y = "Marginal Effects") +
  ylim(c(-0.01, 0.07)) +
  scale_x_continuous(breaks = c(0, 1), labels = c("Other", "Party")) +
  theme_bw() +
  theme(plot.margin = margin(2, 5, 2, 2, "mm"))

me3 <- interplot(model3, var1 = "shaming", var2 = "gwf_military") +
  labs(x = "(b)", y = NULL) +
  ylim(c(-0.01, 0.07)) +
  scale_x_continuous(breaks = c(0, 1), labels = c("Other", "Military")) +
  theme_bw() +
  theme(plot.margin = margin(2, 5, 2, 2, "mm"))

me4 <- interplot(model4, var1 = "shaming", var2 = "gwf_monarchy") +
  labs(x = "(c)", y = "Marginal Effects") +
  ylim(c(-0.01, 0.07)) +
  scale_x_continuous(breaks = c(0, 1), labels = c("Other", "Monarchy")) +
  theme_bw() +
  theme(plot.margin = margin(2, 5, 2, 2, "mm"))

me5 <- interplot(model5, var1 = "shaming", var2 = "gwf_personal") +
  labs(x = "(d)", y = NULL) +
  ylim(c(-0.01, 0.07)) +
  scale_x_continuous(breaks = c(0, 1), labels = c("Other", "Personal")) +
  theme_bw() +
  theme(plot.margin = margin(2, 5, 2, 2, "mm"))

me2 + me3 + me4 + me5

# size of effect
(coef(model2)[2] + coef(model2)[9]) * sd(data$shaming)
(coef(model4)[2] + coef(model4)[9]) * sd(data$shaming)

# robustness check (Table F1)
model1 <- lm(v2x_gender_lead2 ~ shaming * autocracy + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data)
model2 <- lm(v2x_gender_lead2 ~ shaming * gwf_party + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)
model3 <- lm(v2x_gender_lead2 ~ shaming * gwf_military + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)
model4 <- lm(v2x_gender_lead2 ~ shaming * gwf_monarchy + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)
model5 <- lm(v2x_gender_lead2 ~ shaming * gwf_personal + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)
tab_model(model1, model2, model3, model4, model5, collapse.se = TRUE, show.ci = FALSE, p.style = "stars", transform = NULL, digits = 5)

# robustness check (Table F2)
model1 <- lm(v2x_gender_lead3 ~ shaming * autocracy + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data)
model2 <- lm(v2x_gender_lead3 ~ shaming * gwf_party + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)
model3 <- lm(v2x_gender_lead3 ~ shaming * gwf_military + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)
model4 <- lm(v2x_gender_lead3 ~ shaming * gwf_monarchy + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)
model5 <- lm(v2x_gender_lead3 ~ shaming * gwf_personal + ln_pop + ln_gdppc + gdp_growth + civwar + intwar, data = data_sub)
tab_model(model1, model2, model3, model4, model5, collapse.se = TRUE, show.ci = FALSE, p.style = "stars", transform = NULL, digits = 5)

# robustness check (Table F3)
model1 <- lm(v2x_gender_lead1 ~ shaming * autocracy + ln_pop + ln_gdppc + gdp_growth + oda_gdp + civwar + intwar, data = data)
model2 <- lm(v2x_gender_lead1~ shaming * gwf_party + ln_pop + ln_gdppc + gdp_growth + oda_gdp + civwar + intwar, data = data_sub)
model3 <- lm(v2x_gender_lead1 ~ shaming * gwf_military + ln_pop + ln_gdppc + gdp_growth + oda_gdp + civwar + intwar, data = data_sub)
model4 <- lm(v2x_gender_lead1~ shaming * gwf_monarchy + ln_pop + ln_gdppc + gdp_growth + oda_gdp + civwar + intwar, data = data_sub)
model5 <- lm(v2x_gender_lead1 ~ shaming * gwf_personal + ln_pop + ln_gdppc + gdp_growth + oda_gdp + civwar + intwar, data = data_sub)
tab_model(model1, model2, model3, model4, model5, collapse.se = TRUE, show.ci = FALSE, p.style = "stars", transform = NULL, digits = 5)
